Author: Leonardo Espin
Date: 10/2/2019
Below I train a standard feed-forward neural network and a convolutional neural network on the MNIST handwritten digits dataset.
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
(x_train_i, y_train), (x_test_i, y_test) = keras.datasets.mnist.load_data()
print(x_train_i.shape)
print(y_train.shape)
print(x_test_i.shape)
print(y_test.shape)
#check one of the images
plt.imshow(x_train_i[0]);
print(y_train[0])
num_classes=10
img_rows, img_cols = 28,28
#normalize the data to 0-1:
x_train_i = x_train_i/ 255
x_test_i = x_test_i/255
#reshaping for feeding the NN
x_train=x_train_i.reshape(x_train_i.shape[0],img_rows*img_cols,)
x_test=x_test_i.reshape(x_test_i.shape[0],img_rows*img_cols,)
# convert class integers to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
print(y_train.shape)
print(y_test.shape)
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
model=Sequential()
#The model needs to know what input shape it should expect
model.add(Dense(25, #input size 28*28, output is of size 25 to match a hidden layer
activation='relu',
input_shape=(img_rows*img_cols,)))
model.add(Dense(25, #number of nodes in this dense layer
activation='relu'))
#the prediction layer. note that we convert outputs into probabilities
model.add(Dense(num_classes, #number of prediction classes
activation='softmax'))
model.summary()
#configuring the learning process,
model.compile(loss='categorical_crossentropy',#logarithmic loss for multi-class classification
optimizer='adam',#special version of gradient descent that automatically calculates an
metrics=['accuracy'])#optimal learning rate for each gradient descent step
model.fit(x_train, y_train,
batch_size=100,#number of images for each gradient descent step
epochs=20,#1-time through the entire data is an epoch, 20 times per image
validation_split = 0.2)
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
#the conversion below is to remove the error
#"expected dense_input to have shape (748,)..."
print(model.predict_classes(np.array([x_test[0]])))
#array dimensions:
print(x_test[0].ndim)
print((np.array([x_test[0]])).ndim)
y_test[0]
np.argwhere(y_test[0])
The NN below as two extra convolutional layers. The effect of these is dramatic, by reducing the ammount of training required from 20 epochs in the FFNN above to 4 epochs below. Despite of this reduction in the amount of training the accuracy increases (starting from the first epoch from 84% to 93%) by two percentage points
from tensorflow.keras.layers import Flatten, Conv2D
cnv_model=Sequential()
cnv_model.add(Conv2D(12, #number of convolutional filters
kernel_size=(3, 3),#shape of convolution kernel
activation='relu',
input_shape=(img_rows, img_cols, 1)))
#another convolutional layer
cnv_model.add(Conv2D(20,kernel_size=(3, 3),
activation='relu'))
#removing an extra convolution layer resulted in slightly improved accuracy O(1e-3)
#the flattening layer converts the output of the previous layers
#into a 1D representation for each image
cnv_model.add(Flatten())
#for some reason this layer has an order of magnitude more parameters than
#the dense leyer in the previous model. Is it due to the extra dimension
#for the image convolutions?
cnv_model.add(Dense(25, #number of nodes in this dense layer
activation='relu'))
#the prediction layer. note that we convert outputs into probabilities
cnv_model.add(Dense(num_classes, #number of prediction classes
activation='softmax'))
cnv_model.summary()
cnv_model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
#redimensioning the input is necessary because of the convolutions
#the 4th-dimension is for the single color channel (gray-scale images)
cnv_model.fit(x_train_i.reshape(x_train_i.shape[0],img_rows,img_cols,1), y_train,
batch_size=100,#number of images for each gradient descent step
epochs=4, #notice the smaller number of epochs!
validation_split = 0.2)
score2 = cnv_model.evaluate(x_test_i.reshape(x_test_i.shape[0],img_rows,img_cols,1),
y_test, verbose=0)
print('Test loss:', score2[0])
print('Test accuracy:', score2[1])